from bs4 import BeautifulSoup

with open('./1_2_homework_required/index.html', 'r') as web_data:
    Soup = BeautifulSoup(web_data, 'lxml')
    images = Soup.select('body > div > div > div.col-md-9 > div > div > div > img')
    titles = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.caption > h4 > a')
    reviews = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p.pull-right')
    prices = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.caption > h4.pull-right')
    starts = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p:nth-of-type(2)')
#    print(images, titles, reviews, prices, starts, sep='\n-----------------------\n')

for image, title, review, price, start in zip(images, titles, reviews, prices, starts):
    data = {
        'title': title.get_text(),  # 使用get_text()方法取出文本
        'image': image.get('src'),  # 使用get 方法取出带有src的图片链接
        'review': review.get_text(),
        'price': price.get_text(),
        'star': len(start.find_all("span", class_='glyphicon glyphicon-star'))
        # 观察发现,每一个星星会有一次<span class="glyphicon glyphicon-star"></span>,所以我们统计有多少次,就知道有多少个星星了;
        # 使用find_all 统计有几处是★的样式,第一个参数定位标签名,第二个参数定位css 样式,具体可以参考BeautifulSoup 文档示例http://www.crummy.com/software/BeautifulSoup/bs4/doc.zh/#find-all;
        # 由于find_all()返回的结果是列表,我们再使用len()方法去计算列表中的元素个数,也就是星星的数量
    }
    print(data)

'''
body > div > div > div.col-md-9 > div > div:nth-of-type(1) > div > img
body > div > div > div.col-md-9 > div > div:nth-of-type(1) > div > div.caption > h4 > a
body > div > div > div.col-md-9 > div > div:nth-of-type(1) > div > div.caption > h4.pull-right
body > div > div > div.col-md-9 > div > div:nth-of-type(1) > div > div.ratings > p.pull-right
body > div > div > div.col-md-9 > div > div > div > div.ratings > p:nth-of-type(2)
'''